Load data

Duration ~ Region

Game duration is highly significantly longer in at region 9 and 20, highly significantly shorter at region 3, 5. Game duration is significantly longer at region 10, 38, significantly shorter at region 8, 18.

df$region <- factor(df$region)
model_region <- lm(duration ~ region, data = df)
summary(model_region)
## 
## Call:
## lm(formula = duration ~ region, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1795.7  -332.3   -76.3   245.7  4717.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2157.19      29.57  72.953  < 2e-16 ***
## region3      -252.86      30.25  -8.358  < 2e-16 ***
## region5      -137.49      35.25  -3.901 9.66e-05 ***
## region6        67.58      49.39   1.369  0.17119    
## region7       216.06     132.65   1.629  0.10339    
## region8       -77.00      32.38  -2.378  0.01745 *  
## region9       265.95      68.14   3.903 9.56e-05 ***
## region10      520.56     260.31   2.000  0.04555 *  
## region13       26.06      57.12   0.456  0.64829    
## region14      402.81     518.10   0.777  0.43690    
## region15      -59.34      47.18  -1.258  0.20849    
## region17      -64.23     109.65  -0.586  0.55800    
## region18     -208.42      87.95  -2.370  0.01781 *  
## region19      -62.35     105.66  -0.590  0.55517    
## region20     1227.56     260.31   4.716 2.44e-06 ***
## region25      -74.44     102.13  -0.729  0.46606    
## region38      127.11      46.42   2.739  0.00618 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 517.3 on 9983 degrees of freedom
## Multiple R-squared:  0.04567,    Adjusted R-squared:  0.04414 
## F-statistic: 29.86 on 16 and 9983 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$region, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Region",
     ylab = "Duration",
     main = "Duration ~ Region")

#abline(model_region, lwd = 2, col = "firebrick")

# residual plot
plot(model_region)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

Duration ~ First blood time

For each additional 1s delay in first blood, the game’s total duration is predicted to be 0.114 s shorter on average. Statistically, there’s a tiny negative slope, later first blood is associated with marginally shorter games. Practically, that relationship is negligible. First blood time accounts for virtually none of the variability in game duration.

model_first_blood <- lm(duration ~ first_blood_time, data = df)
summary(model_first_blood)
## 
## Call:
## lm(formula = duration ~ first_blood_time, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1121.0  -354.5   -96.4   256.2  4753.2 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1991.7149     8.5456 233.071   <2e-16 ***
## first_blood_time   -0.1144     0.0455  -2.515   0.0119 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared:  0.0006324,  Adjusted R-squared:  0.0005324 
## F-statistic: 6.327 on 1 and 9998 DF,  p-value: 0.01191
# scatterplot
plot(df$first_blood_time, df$duration,
     pch = 16, col = "steelblue",
     xlab = "First Blood Time",
     ylab = "Duration",
     main = "Duration ~ First Blood Time")
abline(model_first_blood, lwd = 2, col = "firebrick")

# residual plot
plot(model_first_blood)

Duration ~ Dire score

There is a positive slope. For each one unit increase in dire score, the game duration increases by 14.4s on average.

model_dire_score <- lm(duration ~ dire_score, data = df)
summary(model_dire_score)
## 
## Call:
## lm(formula = duration ~ dire_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1201.0  -326.5   -86.7   236.0  4428.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1558.9239    12.8889     121   <2e-16 ***
## dire_score    14.4128     0.4118      35   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 499.4 on 9998 degrees of freedom
## Multiple R-squared:  0.1092, Adjusted R-squared:  0.1091 
## F-statistic:  1225 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$dire_score, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Dire Score",
     ylab = "Duration",
     main = "Duration ~ Dire Score")
abline(model_dire_score, lwd = 2, col = "firebrick")

# residual plot
plot(model_dire_score)

Duration ~ Radiant score

There is a positive slope. For each one unit increase in radiant score, the game duration increases by 12.6s on average.

model_radiant_score <- lm(duration ~ radiant_score, data = df)
summary(model_radiant_score)
## 
## Call:
## lm(formula = duration ~ radiant_score, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1261.9  -324.3   -97.1   236.1  4315.5 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1613.0045    13.4045  120.33   <2e-16 ***
## radiant_score   12.6324     0.4331   29.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 507.9 on 9998 degrees of freedom
## Multiple R-squared:  0.07842,    Adjusted R-squared:  0.07833 
## F-statistic: 850.8 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$radiant_score, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Radiant Score",
     ylab = "Duration",
     main = "Duration ~ Radiant Score")
abline(model_radiant_score, lwd = 2, col = "firebrick")

# residual plot
plot(model_radiant_score)

Duration ~ Experience gained at 15 mins

The scatterplot looks obviously polynomial/normal, simple linear regression is not a good way to model the data.

model_exp_15min <- lm(duration ~ exp_15min, data = df)
summary(model_exp_15min)
## 
## Call:
## lm(formula = duration ~ exp_15min, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1141.7  -355.4   -98.6   255.6  4747.0 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.975e+03  5.290e+00 373.371   <2e-16 ***
## exp_15min   -3.250e-03  1.277e-03  -2.545   0.0109 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared:  0.0006474,  Adjusted R-squared:  0.0005475 
## F-statistic: 6.477 on 1 and 9998 DF,  p-value: 0.01094
# scatterplot
plot(df$exp_15min, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Experience gained at 15 mins",
     ylab = "Duration",
     main = "Duration ~ Experience gained at 15 mins")
abline(model_exp_15min, lwd = 2, col = "firebrick")

# residual plot
plot(model_exp_15min)

Duration ~ Team fight duration

There is a positive correlation between team fight duration and total duration.

model_team_duration <- lm(duration ~ teamfight_duration, data = df)
summary(model_team_duration)
## 
## Call:
## lm(formula = duration ~ teamfight_duration, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1122.6  -332.2  -102.5   239.6  4463.9 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.629e+03  1.246e+01  130.73   <2e-16 ***
## teamfight_duration 1.038e+00  3.423e-02   30.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.3 on 9998 degrees of freedom
## Multiple R-squared:  0.08422,    Adjusted R-squared:  0.08413 
## F-statistic: 919.5 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$teamfight_duration, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Team fight duration",
     ylab = "Duration",
     main = "Duration ~ Team fight duration")
abline(model_team_duration, lwd = 2, col = "firebrick")

# residual plot
plot(model_team_duration)

Duration ~ Team fight deaths

There is a positive slope. For one additional team fight death, the total duration increases by 11s.

model_team_death <- lm(duration ~ Tteamfight_deaths, data = df)
summary(model_team_death)
## 
## Call:
## lm(formula = duration ~ Tteamfight_deaths, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1073.0  -331.2  -105.7   239.6  4412.9 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1638.6520    12.1672  134.68   <2e-16 ***
## Tteamfight_deaths   11.2367     0.3698   30.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.2 on 9998 degrees of freedom
## Multiple R-squared:  0.08454,    Adjusted R-squared:  0.08444 
## F-statistic: 923.2 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$Tteamfight_deaths, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Team fight deaths",
     ylab = "Duration",
     main = "Duration ~ Team fight deaths")
abline(model_team_death, lwd = 2, col = "firebrick")

# residual plot
plot(model_team_death)

Duration ~ Team frequency

There is a positive slope. For one additional team fight, the total duration increases by 49.6s.

model_team_frequency <- lm(duration ~ teamfight_frequency, data = df)
summary(model_team_frequency)
## 
## Call:
## lm(formula = duration ~ teamfight_frequency, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1098.4  -333.6  -103.1   241.7  4373.0 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1620.296     12.811  126.48   <2e-16 ***
## teamfight_frequency   49.581      1.646   30.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.6 on 9998 degrees of freedom
## Multiple R-squared:  0.08324,    Adjusted R-squared:  0.08315 
## F-statistic: 907.8 on 1 and 9998 DF,  p-value: < 2.2e-16
# scatterplot
plot(df$teamfight_frequency, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Team fight frequency",
     ylab = "Duration",
     main = "Duration ~ Team fight frequency")
abline(model_team_frequency, lwd = 2, col = "firebrick")

# residual plot
plot(model_team_frequency)

Duration ~ Strength heroes picked by Radiant

df$Strength_picked_r <- factor(df$Strength_picked_r)
model_strr <- lm(duration ~ Strength_picked_r, data = df)
summary(model_strr)
## 
## Call:
## lm(formula = duration ~ Strength_picked_r, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -1107   -356    -98    254   4761 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1966.975     14.935 131.704   <2e-16 ***
## Strength_picked_r1    8.985     15.971   0.563    0.574    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared:  3.166e-05,  Adjusted R-squared:  -6.836e-05 
## F-statistic: 0.3165 on 1 and 9998 DF,  p-value: 0.5737
# scatterplot
plot(df$Strength_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Strength heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Strength heroes picked by Radiant")
abline(model_strr, lwd = 2, col = "firebrick")

# residual plot
plot(model_strr)

Duration ~ Strength heroes picked by Dire

df$Strength_picked_d <- factor(df$Strength_picked_d)
model_strd <- lm(duration ~ Strength_picked_d, data = df)
summary(model_strd)
## 
## Call:
## lm(formula = duration ~ Strength_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1113.4  -356.6   -98.0   254.0  4749.6 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1987.40      15.00 132.490   <2e-16 ***
## Strength_picked_d1   -14.35      16.03  -0.895    0.371    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared:  8.015e-05,  Adjusted R-squared:  -1.986e-05 
## F-statistic: 0.8014 on 1 and 9998 DF,  p-value: 0.3707
# scatterplot
plot(df$Strength_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Strength heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Strength heroes picked by Dire")
abline(model_strd, lwd = 2, col = "firebrick")

# residual plot
plot(model_strd)

Duration ~ Intelligence heroes picked by Radiant

Involving intelligence heroes shortens the total game length.

df$Intelligence_picked_r <- factor(df$Intelligence_picked_r)
model_intr <- lm(duration ~ Intelligence_picked_r, data = df)
summary(model_intr)
## 
## Call:
## lm(formula = duration ~ Intelligence_picked_r, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1135.0  -354.0   -95.8   256.2  4724.0 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2013.02      12.11 166.206  < 2e-16 ***
## Intelligence_picked_r1   -47.18      13.46  -3.505 0.000459 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.8 on 9998 degrees of freedom
## Multiple R-squared:  0.001227,   Adjusted R-squared:  0.001127 
## F-statistic: 12.28 on 1 and 9998 DF,  p-value: 0.0004591
# scatterplot
plot(df$Intelligence_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Intelligence heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Intelligence heroes picked by Radiant")
abline(model_intr, lwd = 2, col = "firebrick")

# residual plot
plot(model_intr)

Duration ~ Intelligence heroes picked by Dire

Selecting Intelligence hero shortens the total game length.

df$Intelligence_picked_d <- factor(df$Intelligence_picked_d)
model_intd <- lm(duration ~ Intelligence_picked_d, data = df)
summary(model_intd)
## 
## Call:
## lm(formula = duration ~ Intelligence_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1120.2  -356.5   -96.5   255.5  4768.5 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2004.15      12.59 159.236   <2e-16 ***
## Intelligence_picked_d1   -35.61      13.87  -2.567   0.0103 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared:  0.0006588,  Adjusted R-squared:  0.0005588 
## F-statistic: 6.591 on 1 and 9998 DF,  p-value: 0.01026
# scatterplot
plot(df$Intelligence_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Intelligence heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Intelligence heroes picked by Dire")
abline(model_intd, lwd = 2, col = "firebrick")

# residual plot
plot(model_intd)

# Duration ~ Agility heroes picked by Radiant

df$Agility_picked_r <- factor(df$Agility_picked_r)
model_agir <- lm(duration ~ Agility_picked_r, data = df)
summary(model_agir)
## 
## Call:
## lm(formula = duration ~ Agility_picked_r, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1116.4  -355.5   -97.5   255.6  4766.5 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1995.37      12.62 158.151   <2e-16 ***
## Agility_picked_r1   -24.92      13.90  -1.793    0.073 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529 on 9998 degrees of freedom
## Multiple R-squared:  0.0003214,  Adjusted R-squared:  0.0002214 
## F-statistic: 3.214 on 1 and 9998 DF,  p-value: 0.07304
# scatterplot
plot(df$Agility_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Agility heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Agility heroes picked by Radiant")
abline(model_agir, lwd = 2, col = "firebrick")

# residual plot
plot(model_agir)

Duration ~ Agility heroes picked by Dire

df$Agility_picked_d <- factor(df$Agility_picked_d)
model_agid <- lm(duration ~ Agility_picked_d, data = df)
summary(model_agid)
## 
## Call:
## lm(formula = duration ~ Agility_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1103.6  -356.6   -98.6   254.4  4764.4 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1985.58      12.68 156.594   <2e-16 ***
## Agility_picked_d1   -13.01      13.95  -0.932    0.351    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared:  8.696e-05,  Adjusted R-squared:  -1.305e-05 
## F-statistic: 0.8695 on 1 and 9998 DF,  p-value: 0.3511
# scatterplot
plot(df$Agility_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Agility heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Agility heroes picked by Dire")
abline(model_agid, lwd = 2, col = "firebrick")

# residual plot
plot(model_agid)

Duration ~ Universal heroes picked by Radiant

Selecting intelligence hero increases the total game length.

df$Universal_picked_r <- factor(df$Universal_picked_r)
model_unir <- lm(duration ~ Universal_picked_r, data = df)
summary(model_unir)
## 
## Call:
## lm(formula = duration ~ Universal_picked_r, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1117.8  -352.3   -97.4   254.2  4745.2 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1928.07      10.24 188.356  < 2e-16 ***
## Universal_picked_r1    63.74      11.95   5.333 9.86e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 528.3 on 9998 degrees of freedom
## Multiple R-squared:  0.002837,   Adjusted R-squared:  0.002737 
## F-statistic: 28.44 on 1 and 9998 DF,  p-value: 9.861e-08
# scatterplot
plot(df$Universal_picked_r, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Universal heroes picked by Radiant",
     ylab = "Duration",
     main = "Duration ~ Universal heroes picked by Radiant")
abline(model_unir, lwd = 2, col = "firebrick")

# residual plot
plot(model_unir)

Duration ~ Universal heroes picked by Dire

Selecting intelligence hero increases the total game length.

df$Universal_picked_d <- factor(df$Universal_picked_d)
model_unid <- lm(duration ~ Universal_picked_d, data = df)
summary(model_unid)
## 
## Call:
## lm(formula = duration ~ Universal_picked_d, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1123.4  -353.4   -96.5   254.5  4739.6 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1916.512      9.993 191.782  < 2e-16 ***
## Universal_picked_d1   80.888     11.769   6.873 6.66e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 527.8 on 9998 degrees of freedom
## Multiple R-squared:  0.004703,   Adjusted R-squared:  0.004603 
## F-statistic: 47.24 on 1 and 9998 DF,  p-value: 6.66e-12
# scatterplot
plot(df$Universal_picked_d, df$duration,
     pch = 16, col = "steelblue",
     xlab = "Number of Universal heroes picked by Dire",
     ylab = "Duration",
     main = "Duration ~ Number of Universal heroes picked by Dire")
abline(model_unid, lwd = 2, col = "firebrick")

# residual plot
plot(model_unid)